library(tidyverse)
library(plotrix)    #std.error
library(hypr)       #for generating contrast coding
library(ggh4x)
library(ggthemes)
library(DiagrammeR) #grViz for causal diagrams
library(plotly)     #ggplotly for interactive plots
library(ggExtra)    #ggMarginal
library(brms)       #bayesian regression
library(bayestestR) #p_direction, hdi
library(tidybayes)  #add_epred_draws
library(emmeans)
library(doParallel) #set the number of cores


### Set global options
options(digits = 3) # set the default number of digits to 3
cores = as.integer(detectCores(logical = FALSE) * 0.7) # set the number of cores to use
registerDoParallel(cores=cores) # register the number of cores to use for parallel processing
options(mc.cores = cores)
options(brms.backend = "cmdstanr")  #this will speed up the model fitting

### MCMC options
niter = 20000  #number of iterations
nwu = 2000 #number of warmups

### Rmd settings
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)
### Custom functions

########### pp_check_each_round ############
pp_check_each_round <- function(m, data, round_info) {
  df_sub = filter(data, round == round_info)
  p = pp_check(m,
               type = "bars",
               ndraws = 100,
               newdata = df_sub) +
    coord_cartesian(xlim = c(0, 10)) +
    ggtitle(round_info)
  return(p)
}


########### pp_check_each_condition ############
pp_check_each_condition <- function(m, data, condition_info) {
  df_sub = filter(data, condition == condition_info)
  p = pp_check(m,
               type = "bars",
               ndraws = 100,
               newdata = df_sub) +
    coord_cartesian(xlim = c(0, 10)) +
    ggtitle(condition_info)
  return(p)
}


########### plot_posterior ############
plot_posterior <- function(model, model2=NA, interaction=FALSE, include_intercept=FALSE, 
                           xlim_cond=0.3, xlim_round=0.06){
  ### extract the posterior draws
  posterior_beta1 <- model %>% 
    gather_draws(`b_.*`, regex = TRUE) %>% 
    mutate(intercept = str_detect(.variable, "Intercept"),
           component = ifelse(str_detect(.variable, ":"), "Interaction", 
                              ifelse(str_detect(.variable, "round"), "Round", 
                                     ifelse(str_detect(.variable, "Intercept"), "Intercept",
                                            "Visibility"))))
  
  if (length(model2) == 1){ #if model2 is NA 
    posterior_beta = posterior_beta1
  } else {
    posterior_beta2 <- model2 %>% 
      gather_draws(`b_.*`, regex = TRUE) %>% 
      filter(.variable == "b_condition_sumAO_Sym") %>% 
      mutate(component = "Visibility")
    
    posterior_beta = rbind(posterior_beta1, posterior_beta2)
  }
  
  if (include_intercept == F){
    posterior_beta = posterior_beta %>% filter(component != "Intercept")
  }
  
  posterior_beta = posterior_beta %>% 
    mutate(.variable = recode(.variable, 
                              "b_Intercept" = "Intercept",
                              "b_conditionAsymAV" = "SymAV--AsymAV",
                              "b_conditionAO" = "SymAV--AO",
                              "b_conditionAsym_Sym" = "AsymAV--SymAV",
                              "b_conditionAO_Asym" = "AO--AsymAV",
                              "b_condition_sumAO_Sym" = "AO--SymAV",
                              "b_round_c" = "Centered round",
                              "b_log_round_c" = "Centered log(round)",
                              "b_conditionAsym_Sym:round_c" = "Centered round: Asym--Sym",
                              "b_conditionAO_Sym:round_c" = "Centered round: AO--Sym",
                              "b_conditionAO_Asym:round_c" = "Centered round: AO--Asym",
                              "b_conditionAsym_Sym:log_round_c" = "Centered log(round): Asym--Sym",
                              "b_conditionAO_Sym:log_round_c" = "Centered log(round): AO--Sym",
                              "b_conditionAO_Asym:log_round_c" = "Centered log(round): AO--Asym"),
           .variable = factor(.variable,
                              levels = c("AO--SymAV", "AO--AsymAV", "AsymAV--SymAV", 
                                         "Centered round")),
           component = factor(component, 
                              levels = c("Intercept", "Visibility", "Round", "Interaction")))

  
  ### change variables if only main effects are plotted
  if (interaction == F) {
    posterior_beta = filter(posterior_beta, !str_detect(.variable, ":"))
    fill_manual_values = c("steelblue", "steelblue")
  } else{
    fill_manual_values = c("steelblue", "steelblue", "steelblue")
  }
  
  
  ### plot the posterior distributions
  p_posterior = ggplot(posterior_beta, 
                       aes(x = .value, y = fct_rev(.variable),
                           fill = component)) +
    geom_vline(xintercept = 0, size = 1) +
    stat_halfeye(aes(slab_alpha = intercept), 
                 normalize = "panels",
                 slab_alpha = 0.5,
                 .width = c(0.89, 0.95), 
                 point_interval = "median_hdi") +
    scale_fill_manual(values = fill_manual_values) +
    scale_slab_alpha_discrete(range = c(0.8, 0.4)) +
    guides(fill = "none", slab_alpha = "none") +
    labs(x = "Coefficient", y = "Effect") +
    theme_clean(base_size = 15) +
    theme(axis.text.x = element_text(colour = "black", size = 14),
          axis.text.y = element_text(colour = "black", size = 14),
          axis.title = element_text(size = 15, face = 'bold'),
          axis.title.x = element_text(vjust = -2),
          axis.title.y = element_text(vjust = 2),
          legend.position = "none",
          strip.text = element_text(size = 15, face = 'bold'),
          strip.background = element_blank(),
          panel.grid.major.x = element_line(color = "grey90", 
                                            linetype = "solid",
                                            size = 0.5),
          panel.grid.major.y = element_line(color = "grey90", 
                                            linetype = "solid",
                                            size = 0.5),
          plot.background = element_blank(),
          plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines")) +
    facet_wrap(vars(component), ncol = 1, scales = "free") +
    facetted_pos_scales(
      x = list(
        scale_x_continuous(limits = c(-xlim_cond, xlim_cond),
                           breaks = c(-0.3, 0, 0.3)),
        scale_x_continuous(limits = c(-xlim_round, xlim_round),
                           breaks = c(-0.05, 0, 0.05))))

  return(p_posterior)
}



########### pp_update_plot ############
pp_update_plot <- function(post_sample, interaction=TRUE){
  sum = ifelse("b_condition_sumAO_Sym" %in% colnames(post_sample), T, F)
  
  intercept = ggplot(post_sample) +
    geom_density(aes(prior_Intercept), fill="steelblue", color="black",alpha=0.6) +
    geom_density(aes(b_Intercept), fill="#FC4E07", color="black",alpha=0.6) + 
    xlab('Intercept') +
    theme_classic()
  
  ### Visibility condition
  if (sum == F){
    cond1 = ggplot(post_sample) +
      geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
      geom_density(aes(b_conditionAsym_Sym), fill="#FC4E07", color="black",alpha=0.6) + 
      xlab('Asym--Sym') +
      theme_classic()
    cond2 = ggplot(post_sample) +
      geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
      geom_density(aes(b_conditionAO_Asym), fill="#FC4E07", color="black",alpha=0.6) + 
      xlab('AO--Asym') +
      theme_classic()
  } else {
    cond1 = ggplot(post_sample) +
      geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
      geom_density(aes(b_condition_sumAO_Sym), fill="#FC4E07", color="black",alpha=0.6) + 
      xlab('AO--Sym') +
      theme_classic()
    cond2 = ggplot(post_sample) +
      geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
      geom_density(aes(b_condition_sumAsym_Sym), fill="#FC4E07", color="black",alpha=0.6) + 
      xlab('Asym--Sym') +
      theme_classic()
  }
  
  ### Round
  if (interaction) {
    round = ggplot(post_sample) +
      geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
      geom_density(aes(b_round_c), fill="#FC4E07", color="black",alpha=0.6) + 
      xlab('Centered round') +
      theme_classic()
    if (sum == F){
      cond1_round = ggplot(post_sample) +
        geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
        geom_density(aes(`b_conditionAsym_Sym:round_c`), fill="#FC4E07", color="black",alpha=0.6) + 
        xlab('Centered Round: Asym--Sym') +
        theme_classic()
      cond2_round = ggplot(post_sample) +
        geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
        geom_density(aes(`b_conditionAO_Asym:round_c`), fill="#FC4E07", color="black",alpha=0.6) + 
        xlab('Centered Round: AO--Asym') +
        theme_classic()
    } else {
      cond1_round = ggplot(post_sample) +
        geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
        geom_density(aes(`b_condition_sumAO_Sym:round_c`), fill="#FC4E07", color="black",alpha=0.6) + 
        xlab('Centered Round: AO--Sym') +
        theme_classic()
      cond2_round = ggplot(post_sample) +
        geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
        geom_density(aes(`b_condition_sumAsym_Sym:round_c`), fill="#FC4E07", color="black",alpha=0.6) + 
        xlab('Centered Round: Asym--Sym') +
        theme_classic()
    }}
  
  ### display the plots
  if (interaction==F){
    gridExtra::grid.arrange(intercept, cond1, cond2, ncol=2)
  } else {
    gridExtra::grid.arrange(intercept, cond1, cond2, round, 
                            cond1_round, cond2_round, 
                            ncol=2)
  }
}

1 =====Data preparation=====

1.1 Load data

1.1.1 df_dtw

Here, we will load the dtw_distance.csv dataframe.

### dtw_distance.csv
fct_columns = c("pair", "referent", 
                "speaker_1", "round_1", "trial_1", "target_1", 
                "speaker_2", "round_2", "trial_2", "target_2")
df_dtw = read_csv("data/dtw_distance_mirrored.csv") %>% 
  mutate(pair = as.numeric(pair),
         target = target_2,
         across(fct_columns, as.factor),
         round = round_2) %>% 
  select(pair, round, target, comparison_id, average_distance, referent:duration_2) %>% 
  filter(!is.na(round))

head(df_dtw)

1.1.2 df_condition_info

### condition info
df_condition_info = read.csv("data/condition_info.csv", stringsAsFactors = TRUE) %>% 
  mutate(pair = factor(pair),
         condition = factor(condition,
                            levels = c("Sym", "Asym", "AO"),
                            labels = c("SymAV", "AsymAV", "AO")))
head(df_condition_info)

1.1.3 Merge dataframes

### merge dataframes
df = df_dtw %>% 
  left_join(df_condition_info, by="pair") %>%
  select(comparison_id, pair, condition, round, target, referent, average_distance) %>% 
  mutate(condition_sum = condition,
         round_c = as.integer(round) - mean(as.integer(round))) #centered round


2 =====Summarize data=====

2.1 Group by pair

2.1.1 mean by condition

summarize_data <- function(df){
  df %>% 
    summarize(n = n(),
              # dtw distance
              mean_dis = mean(average_distance, na.rm = FALSE),
              sd_dis = sd(average_distance, na.rm = FALSE),
              se_dis = std.error(average_distance, na.rm = FALSE),
              lci_dis = mean_dis - qt(1 - (0.05 / 2), n - 1) * se_dis,
              uci_dis = mean_dis + qt(1 - (0.05 / 2), n - 1) * se_dis) %>%
  ungroup()
}


### create df where each row represents a pair.
df_by_pair = df %>% 
  group_by(pair, condition) %>% 
  summarize_data()


### create df where each row represents a condition.
df_by_cond = df %>% 
  group_by(condition) %>% 
  summarize_data()

df_by_cond

2.1.2 mean by round

### create df where each row represents a pair and a round.
df_by_pair_round = df %>% 
  group_by(pair, condition, round) %>% 
  summarize_data()

### create df where each row represents a round.
df_by_round = df %>% 
  group_by(round) %>% 
  summarize_data()

df_by_round

2.1.3 mean by condition x round

### create df where each row represents a condition x round.
df_by_cond_round = df %>% 
  group_by(condition, round) %>% 
  summarize_data()

df_by_cond_round



3 =====Data visualization=====

3.1 Raw distribution

3.1.1 rcp: dtw distance by condition

rcp_distance = df %>%
  ggplot(aes(x = condition, y = average_distance,
             fill = condition)) +
  ggdist::stat_halfeye(adjust = 1, width = 0.3, .width = 0,
                       point_color = NA, alpha = 0.6, justification = -0.5) +
  geom_jitter(aes(x = stage(start = condition, after_scale = x - 0.2)),
              size = 0.2, alpha = 0.3, width = 0.07, height = 0) +
  geom_boxplot(width = .2,
               outlier.shape = NA, 
               alpha = 0.7, 
               color = "black") +
  geom_point(data = df_by_cond, 
             aes(y = mean_dis), 
             shape = 21, size = 3, fill = "white") +
  labs(x="Visibility", 
       y="DTW distance") +
  scale_y_continuous(limits = c(0, 1)) +
  scale_fill_manual(values = c("#ED6B06", "#00786A", "darkgrey")) +
  theme_clean(base_size = 15) +
  theme(axis.text.x = element_text(colour = "black", size = 14),
        axis.text.y = element_text(colour = "black", size = 14),
        axis.title = element_text(size = 15, face = 'bold'),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 2),
        legend.position = "none",
        strip.text = element_text(size = 15, face = 'bold'),
        plot.background = element_blank(),
        plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines"))

rcp_distance


3.1.2 bp: dtw distance by condition

bp_distance = df %>%
  ggplot(aes(x = condition, y = average_distance,
             fill = condition)) +
  geom_jitter(aes(x = stage(start = condition)),
              size = 0.15, alpha = 0.15, width = 0.07, height = 0) +
  geom_boxplot(width = .3,
               outlier.shape = NA, 
               alpha = 0.7, 
               color = "black") +
  geom_point(data = df_by_cond, 
             aes(y = mean_dis), 
             shape = 21, size = 3, fill = "white") +
  labs(x="Visibility", 
       y="DTW distance") +
  scale_y_continuous(limits = c(0, 0.9)) +
  scale_fill_manual(values = c("#ED6B06", "#00786A", "darkgrey")) +
  theme_clean(base_size = 15) +
  theme(axis.text.x = element_text(colour = "black", size = 14),
        axis.text.y = element_text(colour = "black", size = 14),
        axis.title = element_text(size = 15, face = 'bold'),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 2),
        legend.position = "none",
        strip.text = element_text(size = 15, face = 'bold'),
        plot.background = element_blank(),
        plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines"))

bp_distance


3.1.3 bp: distance by condition x round

bp_distance_by_cond_round = 
  ggplot(data=df, aes(x=round, y = average_distance, fill = condition)) +
  geom_jitter(aes(x = stage(start = round)), 
              size = 0.3, alpha = 0.2, width = 0.07, height = 0.02) +
  geom_boxplot(width = .5,
               outlier.shape = NA, alpha = 0.7) +
  geom_point(data = df_by_cond_round, 
             aes(y = mean_dis, group = round), 
             shape = 21, size = 2, fill = "white") +
  labs(x = "Round",
       y = "DTW distance") +
  scale_fill_manual(values = c("#ED6B06", "#00786A", "darkgrey")) +
  scale_y_continuous(limits = c(0, 0.9)) +
  theme_clean(base_size = 15) +
  theme(axis.text.x = element_text(colour = "black", size = 14),
        axis.text.y = element_text(colour = "black", size = 14),
        axis.title = element_text(size = 15, face = 'bold'),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 2),
        legend.position = "none",
        strip.text = element_text(size = 15, face = 'bold'),
        plot.background = element_blank(),
        plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines")) +
  facet_grid(cols = vars(condition))

ggplotly(bp_distance_by_cond_round)



4 =====DTW distance=====

4.1 —Causal model—

We assume the following causal model:

### dag
grViz(
  "digraph {
  graph [ranksep = 0.3]
  node [shape = plaintext]
    X [label = Visibility, fontcolor = forestgreen]
    Y [label = DTW_distance, fontcolor = darkorange]
    Z [label = Round]
  edge [minlen = 2.5]
    {X -> Y
    Z -> Y}
  {rank = same; X; Y}
  }"
)

Based on the causal inference theory, including the visibility only as fixed effects can estimate its causal effect on DTW distance. As adding rounds should not influence the estiamtes of visibility, we will include rounds as well.


4.2 —Contrast coding—

### visibility condition: difference coding
h_cond = hypr(AO_Asym = AsymAV ~ AO,
              Asym_Sym = SymAV ~ AsymAV,
              levels = levels(df$condition))
h_cond
## hypr object containing 2 null hypotheses:
##  H0.AO_Asym: 0 = AsymAV - AO
## H0.Asym_Sym: 0 = SymAV - AsymAV
## 
## Call:
## hypr(AO_Asym = ~AsymAV - AO, Asym_Sym = ~SymAV - AsymAV, levels = c("SymAV", 
## "AsymAV", "AO"))
## 
## Hypothesis matrix (transposed):
##        AO_Asym Asym_Sym
## SymAV   0       1      
## AsymAV  1      -1      
## AO     -1       0      
## 
## Contrast matrix:
##        AO_Asym Asym_Sym
## SymAV   1/3     2/3    
## AsymAV  1/3    -1/3    
## AO     -2/3    -1/3
contrasts(df$condition) = contr.hypothesis(h_cond)


### visibility condition: treatment coding
h_cond = hypr(AO_Sym = SymAV ~ AO,
              Asym_Sym = SymAV ~ AsymAV,
              levels = levels(df$condition))
h_cond
## hypr object containing 2 null hypotheses:
##   H0.AO_Sym: 0 = SymAV - AO
## H0.Asym_Sym: 0 = SymAV - AsymAV
## 
## Call:
## hypr(AO_Sym = ~SymAV - AO, Asym_Sym = ~SymAV - AsymAV, levels = c("SymAV", 
## "AsymAV", "AO"))
## 
## Hypothesis matrix (transposed):
##        AO_Sym Asym_Sym
## SymAV   1      1      
## AsymAV  0     -1      
## AO     -1      0      
## 
## Contrast matrix:
##        AO_Sym Asym_Sym
## SymAV   1/3    1/3    
## AsymAV  1/3   -2/3    
## AO     -2/3    1/3
contrasts(df$condition_sum) = contr.hypothesis(h_cond)


4.3 Log-normal regressions

4.3.1 Prior specification

The mediapipe estimates the locations of each keypoint in the video frame with a number between 0 (top, left) and 1 (bottom, right). As we use normalized dtw distance in which the sum of the distance is normalized by the duration of the annotated gestures, we can assume that most of the dtw distances should be between 0 and 1. Therefore, we expect the mean dtw distance to be around 0.5 and the most likely range of the dtw distance to be between 0 and 1.

Given that the dtw distance cannot be negative, it is not optimal to model the dtw distance with a normal distribution (linear regression). Instead, we will use a log-normal distribution to model the dtw distance. The log-normal distribution is a continuous probability distribution of a random variable whose logarithm is normally distributed. The log-normal distribution is a good choice for modeling the dtw distance because it is always positive and has a long tail to the right.

For log-normal regressions, priors need to be specified on the log scale. As such, we will set a prior for the intercept to be Normal(-0.7, 0.5). This means that we expect the most likely distance score is to be 0.5 (exp(-0.7)) and that the 95% of the time the mean to fall between 0.18 (exp(-1.7)) and 2.01 (exp(0.7)). This prior is weakly informative, as it is still informative about the mean while allowing for a wide range of values.

As for the fixed and random effects, we will set unbiased priors of Normal(0, 0.5). This prior is “unbiased” because we are telling the model that most likely values for the slopes are around 0 (i.e., no effect).

We will use LKJ(2) prior for the correlation matrix.

priors_rint_dtw = c(
  prior(normal(-0.7, 0.5), class = Intercept),
  prior(normal(0, 0.5), class = b),
  prior(normal(0, 0.5), class = sd),
  prior(normal(0, 0.5), class = sigma))

priors_rslope_dtw = c(
  prior(normal(-0.7, 0.5), class = Intercept),
  prior(normal(0, 0.5), class = b),
  prior(normal(0, 0.5), class = sd),
  prior(normal(0, 0.5), class = sigma),
  prior(lkj(2), class = cor))


4.3.2 Model 1: [pair] condition * round_c

mln_dtw_prior = brm(average_distance ~ 1 + condition * round_c +
                      (1+round_c|pair) + (1|target),
                    family = lognormal(),
                    prior = priors_rslope_dtw,
                    sample_prior = "only",
                    data = df,
                    file = "models/mln_dtw_prior")

pp_check(mln_dtw_prior, ndraws = 100) +
  coord_cartesian(xlim = c(0, 3),
                  ylim = c(0, 100))

The prior predictive check shows that the model is able to generate data that is consistent with the observed data.


4.3.2.1 Fit the model

mln_dtw = brm(average_distance ~ 1 + condition * round_c +
                (1+round_c|pair) + (1|target),
              family = lognormal(),
              prior = priors_rslope_dtw,
              data = df,
              sample_prior = T,
              warmup = nwu, iter = niter,
              control = list(adapt_delta = 0.9,
                             max_treedepth = 15),
              file = "models/mln_dtw")

model = mln_dtw
summary(model)
##  Family: lognormal 
##   Links: mu = identity; sigma = identity 
## Formula: average_distance ~ 1 + condition * round_c + (1 + round_c | pair) + (1 | target) 
##    Data: df (Number of observations: 1974) 
##   Draws: 4 chains, each with iter = 20000; warmup = 2000; thin = 1;
##          total post-warmup draws = 72000
## 
## Multilevel Hyperparameters:
## ~pair (Number of levels: 45) 
##                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)              0.14      0.02     0.10     0.19 1.00    21893
## sd(round_c)                0.06      0.01     0.03     0.08 1.00    26473
## cor(Intercept,round_c)    -0.20      0.23    -0.62     0.28 1.00    27317
##                        Tail_ESS
## sd(Intercept)             36879
## sd(round_c)               40867
## cor(Intercept,round_c)    41941
## 
## ~target (Number of levels: 16) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.05      0.02     0.02     0.09 1.00    19968    21033
## 
## Regression Coefficients:
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                    -1.23      0.03    -1.29    -1.17 1.00    21125
## conditionAO_Asym             -0.07      0.07    -0.20     0.06 1.00    20690
## conditionAsym_Sym            -0.01      0.06    -0.13     0.10 1.00    17898
## round_c                       0.02      0.01    -0.01     0.04 1.00    31397
## conditionAO_Asym:round_c     -0.04      0.03    -0.10     0.03 1.00    29983
## conditionAsym_Sym:round_c     0.01      0.03    -0.05     0.06 1.00    26412
##                           Tail_ESS
## Intercept                    32379
## conditionAO_Asym             32915
## conditionAsym_Sym            30492
## round_c                      41228
## conditionAO_Asym:round_c     40615
## conditionAsym_Sym:round_c    38341
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.41      0.01     0.40     0.43 1.00    74026    52292
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
bayestestR::hdi(model)
# bayestestR::hdi(model, ci = 0.89)

The coefficients show that the visibility condition did not have a reliable effect on the dtw distance.


4.3.2.2 Visualize the posterior distributions

plot_posterior(model)


4.3.2.3 Prior-posterior update plot

post_sample = as_draws_df(model)
pp_update_plot(post_sample)


4.3.2.4 Posterior predictive check

pp_check(model, ndraws = 100) +
  coord_cartesian(xlim = c(0, 3))

The posterior predictive check shows that the model is able to generate data that is overall consistent with the observed data.


4.3.2.5 Hypothesis testing: Bayes factor

### varying priors for sensitivity analysis
prior_size = c("xs", "s", "l", "xl")
prior_sd = c(0.1, 0.3, 1, 1.5)
bfs_cond_ao_asym = c()
bfs_cond_asym_sym = c()
bfs_round = c()

for (i in 1:length(prior_sd)){
  priors = c(
    prior(normal(-0.7, 0.5), class = Intercept),
    set_prior(paste0("normal(0,", prior_sd[i], ")"), class = "b"),
    prior(normal(0, 0.5), class = sd),
    prior(normal(0, 0.5), class = sigma),
    prior(lkj(2), class = cor))
  
  fname = paste0("models/mln_dtw_", prior_size[i])
  
  fit = brm(average_distance ~ 1 + condition * round_c +
              (1+round_c|pair) + (1|target),
            family = lognormal(),
            prior = priors,
            data = df,
            sample_prior = T,
            save_pars = save_pars(all = TRUE),
            warmup = nwu, iter = niter,
            control = list(adapt_delta = 0.9, 
                           max_treedepth = 15),
            file = fname)
  
  ### BF for visibility conditions
  # sym - asym
  h = hypothesis(fit, "conditionAO_Asym = 0")
  bf = 1 / abs(h$hypothesis$Evid.Ratio)
  bfs_cond_ao_asym = c(bfs_cond_ao_asym, bf)
  
  # sym - ao
  h = hypothesis(fit, "conditionAsym_Sym = 0")
  bf = 1 / abs(h$hypothesis$Evid.Ratio)
  bfs_cond_asym_sym = c(bfs_cond_asym_sym, bf)
  
  ### BF for rounds
  h = hypothesis(model, "round_c = 0")
  bf = 1 / abs(h$hypothesis$Evid.Ratio)
  bfs_round = c(bfs_round, bf)
}

### add BF for the main/medium model
prior_size[3:5] = c("m", prior_size[3:4])
prior_sd[3:5] = c(0.5, prior_sd[3:4])

### BF for visibility
# sym - asym
h = hypothesis(model, "conditionAO_Asym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_ao_asym[3:5] = c(bf, bfs_cond_ao_asym[3:4])

# sym - ao
h = hypothesis(model, "conditionAsym_Sym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_asym_sym[3:5] = c(bf, bfs_cond_asym_sym[3:4])

### BF for rounds
h = hypothesis(model, "round_c = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_round[3:5] = c(bf, bfs_round[3:4])

### make a df for BFs
df_bf = data.frame(size = prior_size,
                   sd = prior_sd,
                   ao_asym = bfs_cond_ao_asym,
                   asym_sym = bfs_cond_asym_sym,
                   round = bfs_round) %>% 
  mutate(prior = paste0("N(0, ", sd, ")")) %>% 
  pivot_longer(cols = c("ao_asym", "asym_sym", "round"),
               names_to = "Effect",
               values_to = "BF10") %>% 
  mutate(Predictor = ifelse(grepl("round", Effect), "Round", "Visibility"))

df_bf$Effect = recode(df_bf$Effect,
                          ao_asym = "AO--AsymAV",
                          asym_sym = "AsymAV--SymAV",
                          round = "Round")
#### Plot BFs ####
ggplot(df_bf, aes(x = prior, y = BF10, group = Effect)) +
   geom_hline(yintercept = 1, linetype="dashed") +
   geom_point(aes(color=Effect)) +
   geom_line(aes(color=Effect)) +
   theme_bw(base_size = 12)+
   theme(legend.position = "top")+
   scale_y_log10("Bayes factor (BF10)",
                 breaks = c(0.001, 0.01, 0.03, 0.1, 0.33, 1, 3, 10, 30, 100, 1e4, 1e5, 1e6, 1e7),
                 labels = c(0.001, 0.01, 0.03, 0.1, 0.33, 1, 3, 10, 30, 100, 1e4, 1e5, 1e6, 1e7)) +
   xlab("prior")


4.3.2.6 Probability of direction

p_direction(model)


4.3.2.7 Pair-wise comparison

emmeans(model, pairwise ~ condition)$contrasts
##  contrast       estimate lower.HPD upper.HPD
##  SymAV - AsymAV  -0.0131    -0.129    0.1054
##  SymAV - AO      -0.0826    -0.208    0.0453
##  AsymAV - AO     -0.0693    -0.199    0.0566
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95
emmeans(model, pairwise ~ condition, level = 0.89)$contrasts
##  contrast       estimate lower.HPD upper.HPD
##  SymAV - AsymAV  -0.0131    -0.107    0.0810
##  SymAV - AO      -0.0826    -0.185    0.0198
##  AsymAV - AO     -0.0693    -0.169    0.0379
## 
## Point estimate displayed: median 
## HPD interval probability: 0.89


4.3.2.8 Visualize estimates

plot(conditional_effects(mln_dtw), ask = F)


4.3.3 Model 2: [pair] condition_sum * round_c

mln_dtw_sum_prior = brm(average_distance ~ 1 + condition_sum * round_c +
                          (1+round_c|pair) + (1|target),
                        family = lognormal(),
                        prior = priors_rslope_dtw,
                        sample_prior = "only",
                        data = df,
                        file = "models/mln_dtw_sum_prior")

pp_check(mln_dtw_sum_prior, ndraws = 100) +
  coord_cartesian(xlim = c(0, 3),
                  ylim = c(0, 10))

The prior predictive check shows that the model is able to generate data that is consistent with the observed data.


4.3.3.1 Fit the model

mln_dtw_sum = brm(average_distance ~ 1 + condition_sum * round_c +
                    (1+round_c|pair) + (1|target),
                  family = lognormal(),
                  prior = priors_rslope_dtw,
                  data = df,
                  sample_prior = T,
                  warmup = nwu, iter = niter,
                  control = list(adapt_delta = 0.9,
                                 max_treedepth = 15),
                  file = "models/mln_dtw_sum")

model1 = mln_dtw
model = mln_dtw_sum
summary(model)
##  Family: lognormal 
##   Links: mu = identity; sigma = identity 
## Formula: average_distance ~ 1 + condition_sum * round_c + (1 + round_c | pair) + (1 | target) 
##    Data: df (Number of observations: 1974) 
##   Draws: 4 chains, each with iter = 20000; warmup = 2000; thin = 1;
##          total post-warmup draws = 72000
## 
## Multilevel Hyperparameters:
## ~pair (Number of levels: 45) 
##                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)              0.14      0.02     0.10     0.19 1.00    26243
## sd(round_c)                0.06      0.01     0.03     0.08 1.00    30510
## cor(Intercept,round_c)    -0.20      0.24    -0.62     0.28 1.00    37154
##                        Tail_ESS
## sd(Intercept)             43399
## sd(round_c)               46781
## cor(Intercept,round_c)    50981
## 
## ~target (Number of levels: 16) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.05      0.02     0.02     0.09 1.00    22557    23543
## 
## Regression Coefficients:
##                               Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                        -1.23      0.03    -1.29    -1.17 1.00
## condition_sumAO_Sym              -0.08      0.06    -0.21     0.05 1.00
## condition_sumAsym_Sym            -0.01      0.06    -0.13     0.11 1.00
## round_c                           0.02      0.01    -0.01     0.04 1.00
## condition_sumAO_Sym:round_c      -0.03      0.03    -0.09     0.03 1.00
## condition_sumAsym_Sym:round_c     0.01      0.03    -0.05     0.06 1.00
##                               Bulk_ESS Tail_ESS
## Intercept                        28733    39899
## condition_sumAO_Sym              27772    37885
## condition_sumAsym_Sym            24609    38168
## round_c                          45747    49636
## condition_sumAO_Sym:round_c      45481    50165
## condition_sumAsym_Sym:round_c    37105    45533
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.41      0.01     0.40     0.43 1.00   115803    53607
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
bayestestR::hdi(model)
# bayestestR::hdi(model, ci = 0.89)

The coefficients show that the visibility condition did not have a reliable effect on the dtw distance.


4.3.3.2 Visualize the posterior distributions

plot_posterior(model1, model)


4.3.3.3 Prior-posterior update plot

post_sample = as_draws_df(model)
pp_update_plot(post_sample, interaction = T)


4.3.3.4 Posterior predictive check

pp_check(model, ndraws = 100) +
  coord_cartesian(xlim = c(0, 3))

The posterior predictive check shows that the model is able to generate data that is overall consistent with the observed data.


4.3.3.5 Hypothesis testing: Bayes factor

### varying priors for sensitivity analysis
prior_size = c("xs", "s", "l", "xl")
prior_sd = c(0.1, 0.3, 1, 1.5)
bfs_cond_ao_sym = c()

for (i in 1:length(prior_sd)){
  priors = c(
    prior(normal(-0.7, 0.5), class = Intercept),
    set_prior(paste0("normal(0,", prior_sd[i], ")"), class = "b"),
    prior(normal(0, 0.5), class = sd),
    prior(normal(0, 0.5), class = sigma),
    prior(lkj(2), class = cor))
  
  fname = paste0("models/mln_dtw_sum_", prior_size[i])
  
  fit = brm(average_distance ~ 1 + condition_sum * round_c +
              (1+round_c|pair) + (1|target),
            family = lognormal(),
            prior = priors,
            data = df,
            sample_prior = T,
            save_pars = save_pars(all = TRUE),
            warmup = nwu, iter = niter,
            control = list(adapt_delta = 0.9, 
                           max_treedepth = 15),
            file = fname)
  
  # BF for sym - ao
  h = hypothesis(fit, "condition_sumAO_Sym = 0")
  bf = 1 / abs(h$hypothesis$Evid.Ratio)
  bfs_cond_ao_sym = c(bfs_cond_ao_sym, bf)
}

### add BF for the main/medium model
prior_size[3:5] = c("m", prior_size[3:4])
prior_sd[3:5] = c(0.5, prior_sd[3:4])

# BF for sym - ao
h = hypothesis(model, "condition_sumAO_Sym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_ao_sym[3:5] = c(bf, bfs_cond_ao_sym[3:4])


### make a df for BFs
df_bf_temp = data.frame(size = prior_size,
                   sd = prior_sd,
                   Effect = "AO--SymAV",
                   BF10 = bfs_cond_ao_sym) %>% 
  mutate(prior = paste0("N(0, ", sd, ")"),
         Predictor = "Visibility")

df_bf_new = df_bf %>% 
  rbind(df_bf_temp) %>% 
  mutate(Effect = factor(Effect,
                             levels = c("AO--SymAV", "AO--AsymAV", "AsymAV--SymAV", "Round")),
         Predictor = factor(Predictor,
                            levels = c("Visibility", "Round")))

df_bf_new %>% arrange(Effect, sd)
#### Plot BFs ####
ggplot(df_bf_new,
       aes(x = factor(sd), y = BF10, group = Effect)) +
  geom_hline(yintercept = 1, linetype="dashed") +
  geom_point(aes(color=Effect)) +
  geom_line(aes(color=Effect)) +
  facet_wrap(vars(Predictor)) +
  theme_clean(base_size = 15) +
  theme(axis.text.x = element_text(colour = "black", size = 14),
        axis.text.y = element_text(colour = "black", size = 14),
        axis.title = element_text(size = 15, face = 'bold'),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 2),
        legend.position = "right",
        strip.text = element_text(size = 15, face = 'bold'),
        plot.background = element_blank(),
        plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines")) +
  scale_y_log10("Bayes factor (BF10)",
                breaks = c(0.001, 0.03, 0.01, 0.1, 0.33, 1, 3, 10, 30, 100),
                labels = c(0.001, 0.03, 0.01, 0.1, 0.33, 1, 3, 10, 30, 100)) +
  xlab("SD for the prior")


4.3.3.6 Probability of direction

p_direction(model)



5 =====Session info=====

sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22631)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] rstan_2.32.6       StanHeaders_2.32.7 doParallel_1.0.17  iterators_1.0.14  
##  [5] foreach_1.5.2      emmeans_1.10.7     tidybayes_3.0.7    bayestestR_0.15.2 
##  [9] brms_2.22.0        Rcpp_1.0.12        ggExtra_0.10.1     plotly_4.10.4     
## [13] DiagrammeR_1.0.11  ggthemes_5.1.0     ggh4x_0.3.1        hypr_0.2.8        
## [17] plotrix_3.8-4      lubridate_1.9.3    forcats_1.0.0      stringr_1.5.1     
## [21] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1       
## [25] tibble_3.2.1       ggplot2_3.5.2      tidyverse_2.0.0   
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.1-0     estimability_1.5.1   QuickJSR_1.1.3      
##   [4] rstudioapi_0.17.1    farver_2.1.1         svUnit_1.0.6        
##   [7] bit64_4.0.5          mvtnorm_1.2-4        bridgesampling_1.1-2
##  [10] codetools_0.2-18     cachem_1.0.8         knitr_1.49          
##  [13] bayesplot_1.11.1     jsonlite_1.8.8       ggdist_3.3.2        
##  [16] shiny_1.11.1         compiler_4.2.2       httr_1.4.7          
##  [19] backports_1.4.1      Matrix_1.5-1         fastmap_1.1.1       
##  [22] lazyeval_0.2.2       cli_3.6.2            later_1.3.2         
##  [25] visNetwork_2.1.2     htmltools_0.5.8.1    tools_4.2.2         
##  [28] coda_0.19-4.1        gtable_0.3.6         glue_1.7.0          
##  [31] reshape2_1.4.4       posterior_1.6.1      V8_6.0.6            
##  [34] jquerylib_0.1.4      vctrs_0.6.5          nlme_3.1-160        
##  [37] crosstalk_1.2.1      insight_1.1.0        tensorA_0.36.2.1    
##  [40] xfun_0.53            ps_1.7.6             timechange_0.3.0    
##  [43] mime_0.12            miniUI_0.1.1.1       lifecycle_1.0.4     
##  [46] MASS_7.3-58.1        scales_1.3.0         vroom_1.6.5         
##  [49] ragg_1.3.0           hms_1.1.3            promises_1.3.3      
##  [52] Brobdingnag_1.2-9    inline_0.3.21        RColorBrewer_1.1-3  
##  [55] curl_6.2.1           yaml_2.3.8           gridExtra_2.3       
##  [58] loo_2.8.0            sass_0.4.9           stringi_1.8.3       
##  [61] checkmate_2.3.1      pkgbuild_1.4.6       cmdstanr_0.8.1      
##  [64] rlang_1.1.3          pkgconfig_2.0.3      systemfonts_1.0.6   
##  [67] matrixStats_1.3.0    distributional_0.5.0 pracma_2.4.4        
##  [70] evaluate_1.0.3       lattice_0.20-45      rstantools_2.4.0    
##  [73] htmlwidgets_1.6.4    labeling_0.4.3       processx_3.8.4      
##  [76] bit_4.0.5            tidyselect_1.2.1     plyr_1.8.9          
##  [79] magrittr_2.0.3       R6_2.6.1             generics_0.1.3      
##  [82] pillar_1.10.1        withr_3.0.2          datawizard_1.0.1    
##  [85] abind_1.4-8          crayon_1.5.3         arrayhelpers_1.1-0  
##  [88] tzdb_0.4.0           rmarkdown_2.29       grid_4.2.2          
##  [91] data.table_1.15.4    digest_0.6.35        xtable_1.8-4        
##  [94] httpuv_1.6.15        textshaping_0.3.7    stats4_4.2.2        
##  [97] RcppParallel_5.1.7   munsell_0.5.1        viridisLite_0.4.2   
## [100] bslib_0.9.0